{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "11d2140e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3ef3dae3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv(\"./data/movie_metadata.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "752bd100",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Raimi</td>\n",
       "      <td>392.0</td>\n",
       "      <td>156.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4000.0</td>\n",
       "      <td>James Franco</td>\n",
       "      <td>24000.0</td>\n",
       "      <td>336530303.0</td>\n",
       "      <td>Action|Adventure|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>1902.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>258000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Color</td>\n",
       "      <td>Nathan Greno</td>\n",
       "      <td>324.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>284.0</td>\n",
       "      <td>Donna Murphy</td>\n",
       "      <td>799.0</td>\n",
       "      <td>200807262.0</td>\n",
       "      <td>Adventure|Animation|Comedy|Family|Fantasy|Musi...</td>\n",
       "      <td>...</td>\n",
       "      <td>387.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>260000000.0</td>\n",
       "      <td>2010.0</td>\n",
       "      <td>553.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>1.85</td>\n",
       "      <td>29000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Color</td>\n",
       "      <td>Joss Whedon</td>\n",
       "      <td>635.0</td>\n",
       "      <td>141.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19000.0</td>\n",
       "      <td>Robert Downey Jr.</td>\n",
       "      <td>26000.0</td>\n",
       "      <td>458991599.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>1117.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>21000.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>118000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Color</td>\n",
       "      <td>David Yates</td>\n",
       "      <td>375.0</td>\n",
       "      <td>153.0</td>\n",
       "      <td>282.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>Daniel Radcliffe</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>301956980.0</td>\n",
       "      <td>Adventure|Family|Fantasy|Mystery</td>\n",
       "      <td>...</td>\n",
       "      <td>973.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>10000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews  duration  \\\n",
       "0  Color      James Cameron                   723.0     178.0   \n",
       "1  Color     Gore Verbinski                   302.0     169.0   \n",
       "2  Color         Sam Mendes                   602.0     148.0   \n",
       "3  Color  Christopher Nolan                   813.0     164.0   \n",
       "4    NaN        Doug Walker                     NaN       NaN   \n",
       "5  Color     Andrew Stanton                   462.0     132.0   \n",
       "6  Color          Sam Raimi                   392.0     156.0   \n",
       "7  Color       Nathan Greno                   324.0     100.0   \n",
       "8  Color        Joss Whedon                   635.0     141.0   \n",
       "9  Color        David Yates                   375.0     153.0   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes       actor_2_name  \\\n",
       "0                      0.0                   855.0   Joel David Moore   \n",
       "1                    563.0                  1000.0      Orlando Bloom   \n",
       "2                      0.0                   161.0       Rory Kinnear   \n",
       "3                  22000.0                 23000.0     Christian Bale   \n",
       "4                    131.0                     NaN         Rob Walker   \n",
       "5                    475.0                   530.0    Samantha Morton   \n",
       "6                      0.0                  4000.0       James Franco   \n",
       "7                     15.0                   284.0       Donna Murphy   \n",
       "8                      0.0                 19000.0  Robert Downey Jr.   \n",
       "9                    282.0                 10000.0   Daniel Radcliffe   \n",
       "\n",
       "   actor_1_facebook_likes        gross  \\\n",
       "0                  1000.0  760505847.0   \n",
       "1                 40000.0  309404152.0   \n",
       "2                 11000.0  200074175.0   \n",
       "3                 27000.0  448130642.0   \n",
       "4                   131.0          NaN   \n",
       "5                   640.0   73058679.0   \n",
       "6                 24000.0  336530303.0   \n",
       "7                   799.0  200807262.0   \n",
       "8                 26000.0  458991599.0   \n",
       "9                 25000.0  301956980.0   \n",
       "\n",
       "                                              genres  ...  \\\n",
       "0                    Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                           Action|Adventure|Fantasy  ...   \n",
       "2                          Action|Adventure|Thriller  ...   \n",
       "3                                    Action|Thriller  ...   \n",
       "4                                        Documentary  ...   \n",
       "5                            Action|Adventure|Sci-Fi  ...   \n",
       "6                           Action|Adventure|Romance  ...   \n",
       "7  Adventure|Animation|Comedy|Family|Fantasy|Musi...  ...   \n",
       "8                            Action|Adventure|Sci-Fi  ...   \n",
       "9                   Adventure|Family|Fantasy|Mystery  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN      NaN             NaN          NaN   \n",
       "5                738.0  English      USA           PG-13  263700000.0   \n",
       "6               1902.0  English      USA           PG-13  258000000.0   \n",
       "7                387.0  English      USA              PG  260000000.0   \n",
       "8               1117.0  English      USA           PG-13  250000000.0   \n",
       "9                973.0  English       UK              PG  250000000.0   \n",
       "\n",
       "   title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0      2009.0                  936.0        7.9          1.78   \n",
       "1      2007.0                 5000.0        7.1          2.35   \n",
       "2      2015.0                  393.0        6.8          2.35   \n",
       "3      2012.0                23000.0        8.5          2.35   \n",
       "4         NaN                   12.0        7.1           NaN   \n",
       "5      2012.0                  632.0        6.6          2.35   \n",
       "6      2007.0                11000.0        6.2          2.35   \n",
       "7      2010.0                  553.0        7.8          1.85   \n",
       "8      2015.0                21000.0        7.5          2.35   \n",
       "9      2009.0                11000.0        7.5          2.35   \n",
       "\n",
       "  movie_facebook_likes  \n",
       "0                33000  \n",
       "1                    0  \n",
       "2                85000  \n",
       "3               164000  \n",
       "4                    0  \n",
       "5                24000  \n",
       "6                    0  \n",
       "7                29000  \n",
       "8               118000  \n",
       "9                10000  \n",
       "\n",
       "[10 rows x 28 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "274c5a71",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "print(type(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "6a8d88ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "s1 = data[\"director_name\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "4f46e0a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.series.Series'>\n"
     ]
    }
   ],
   "source": [
    "print(type(s1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "3557a6fb",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0           James Cameron\n",
       "1          Gore Verbinski\n",
       "2              Sam Mendes\n",
       "3       Christopher Nolan\n",
       "4             Doug Walker\n",
       "              ...        \n",
       "5038          Scott Smith\n",
       "5039                  NaN\n",
       "5040     Benjamin Roberds\n",
       "5041          Daniel Hsia\n",
       "5042             Jon Gunn\n",
       "Name: director_name, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "d4fd6fbd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    5028.000000\n",
       "mean      107.201074\n",
       "std        25.197441\n",
       "min         7.000000\n",
       "25%        93.000000\n",
       "50%       103.000000\n",
       "75%       118.000000\n",
       "max       511.000000\n",
       "Name: duration, dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[\"duration\"].describebe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d5106ef0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    5028.000000\n",
       "mean      107.201074\n",
       "std        25.197441\n",
       "min         7.000000\n",
       "25%        93.000000\n",
       "50%       103.000000\n",
       "75%       118.000000\n",
       "max       511.000000\n",
       "Name: duration, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.duration.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "c1b86cd9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>director_name</th>\n",
       "      <th>duration</th>\n",
       "      <th>actor_2_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>James Cameron</td>\n",
       "      <td>178.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>169.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>148.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>164.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>87.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>Valorie Curry</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>76.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>100.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>90.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5043 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          director_name  duration      actor_2_name\n",
       "0         James Cameron     178.0  Joel David Moore\n",
       "1        Gore Verbinski     169.0     Orlando Bloom\n",
       "2            Sam Mendes     148.0      Rory Kinnear\n",
       "3     Christopher Nolan     164.0    Christian Bale\n",
       "4           Doug Walker       NaN        Rob Walker\n",
       "...                 ...       ...               ...\n",
       "5038        Scott Smith      87.0     Daphne Zuniga\n",
       "5039                NaN      43.0     Valorie Curry\n",
       "5040   Benjamin Roberds      76.0     Maxwell Moody\n",
       "5041        Daniel Hsia     100.0     Daniel Henney\n",
       "5042           Jon Gunn      90.0  Brian Herzlinger\n",
       "\n",
       "[5043 rows x 3 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[[\"director_name\",\"duration\",\"actor_2_name\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "d3c0e9f2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        James Cameron\n",
       "1       Gore Verbinski\n",
       "2           Sam Mendes\n",
       "3    Christopher Nolan\n",
       "4          Doug Walker\n",
       "Name: director_name, dtype: object"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.director_name[:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "46c790f6",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Raimi</td>\n",
       "      <td>392.0</td>\n",
       "      <td>156.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4000.0</td>\n",
       "      <td>James Franco</td>\n",
       "      <td>24000.0</td>\n",
       "      <td>336530303.0</td>\n",
       "      <td>Action|Adventure|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>1902.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>258000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Color</td>\n",
       "      <td>David Yates</td>\n",
       "      <td>375.0</td>\n",
       "      <td>153.0</td>\n",
       "      <td>282.0</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>Daniel Radcliffe</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>301956980.0</td>\n",
       "      <td>Adventure|Family|Fantasy|Mystery</td>\n",
       "      <td>...</td>\n",
       "      <td>973.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>10000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4688</th>\n",
       "      <td>Color</td>\n",
       "      <td>Steve James</td>\n",
       "      <td>53.0</td>\n",
       "      <td>170.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Arthur Agee</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7830611.0</td>\n",
       "      <td>Documentary|Drama|Sport</td>\n",
       "      <td>...</td>\n",
       "      <td>74.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>700000.0</td>\n",
       "      <td>1994.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>8.3</td>\n",
       "      <td>1.33</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4694</th>\n",
       "      <td>Color</td>\n",
       "      <td>Peter Jackson</td>\n",
       "      <td>446.0</td>\n",
       "      <td>201.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>Thomas Kretschmann</td>\n",
       "      <td>6000.0</td>\n",
       "      <td>218051260.0</td>\n",
       "      <td>Action|Adventure|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>2618.0</td>\n",
       "      <td>English</td>\n",
       "      <td>New Zealand</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>207000000.0</td>\n",
       "      <td>2005.0</td>\n",
       "      <td>918.0</td>\n",
       "      <td>7.2</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4708</th>\n",
       "      <td>Color</td>\n",
       "      <td>Michael Wadleigh</td>\n",
       "      <td>53.0</td>\n",
       "      <td>215.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>136.0</td>\n",
       "      <td>Jimi Hendrix</td>\n",
       "      <td>262.0</td>\n",
       "      <td>13300000.0</td>\n",
       "      <td>Documentary|History|Music</td>\n",
       "      <td>...</td>\n",
       "      <td>63.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>R</td>\n",
       "      <td>600000.0</td>\n",
       "      <td>1970.0</td>\n",
       "      <td>227.0</td>\n",
       "      <td>8.1</td>\n",
       "      <td>2.20</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4747</th>\n",
       "      <td>Black and White</td>\n",
       "      <td>Akira Kurosawa</td>\n",
       "      <td>153.0</td>\n",
       "      <td>202.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Minoru Chiaki</td>\n",
       "      <td>304.0</td>\n",
       "      <td>269061.0</td>\n",
       "      <td>Action|Adventure|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>596.0</td>\n",
       "      <td>Japanese</td>\n",
       "      <td>Japan</td>\n",
       "      <td>Unrated</td>\n",
       "      <td>2000000.0</td>\n",
       "      <td>1954.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>8.7</td>\n",
       "      <td>1.37</td>\n",
       "      <td>11000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4885</th>\n",
       "      <td>Black and White</td>\n",
       "      <td>King Vidor</td>\n",
       "      <td>48.0</td>\n",
       "      <td>151.0</td>\n",
       "      <td>54.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Renée Adorée</td>\n",
       "      <td>81.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Romance|War</td>\n",
       "      <td>...</td>\n",
       "      <td>45.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>USA</td>\n",
       "      <td>Not Rated</td>\n",
       "      <td>245000.0</td>\n",
       "      <td>1925.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>8.3</td>\n",
       "      <td>1.33</td>\n",
       "      <td>226</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>205 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                 color      director_name  num_critic_for_reviews  duration  \\\n",
       "0                Color      James Cameron                   723.0     178.0   \n",
       "1                Color     Gore Verbinski                   302.0     169.0   \n",
       "3                Color  Christopher Nolan                   813.0     164.0   \n",
       "6                Color          Sam Raimi                   392.0     156.0   \n",
       "9                Color        David Yates                   375.0     153.0   \n",
       "...                ...                ...                     ...       ...   \n",
       "4688             Color        Steve James                    53.0     170.0   \n",
       "4694             Color      Peter Jackson                   446.0     201.0   \n",
       "4708             Color   Michael Wadleigh                    53.0     215.0   \n",
       "4747   Black and White     Akira Kurosawa                   153.0     202.0   \n",
       "4885   Black and White         King Vidor                    48.0     151.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes        actor_2_name  \\\n",
       "0                         0.0                   855.0    Joel David Moore   \n",
       "1                       563.0                  1000.0       Orlando Bloom   \n",
       "3                     22000.0                 23000.0      Christian Bale   \n",
       "6                         0.0                  4000.0        James Franco   \n",
       "9                       282.0                 10000.0    Daniel Radcliffe   \n",
       "...                       ...                     ...                 ...   \n",
       "4688                     23.0                     2.0         Arthur Agee   \n",
       "4694                      0.0                    84.0  Thomas Kretschmann   \n",
       "4708                     14.0                   136.0        Jimi Hendrix   \n",
       "4747                      0.0                     4.0       Minoru Chiaki   \n",
       "4885                     54.0                     6.0        Renée Adorée   \n",
       "\n",
       "      actor_1_facebook_likes        gross                            genres  \\\n",
       "0                     1000.0  760505847.0   Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0          Action|Adventure|Fantasy   \n",
       "3                    27000.0  448130642.0                   Action|Thriller   \n",
       "6                    24000.0  336530303.0          Action|Adventure|Romance   \n",
       "9                    25000.0  301956980.0  Adventure|Family|Fantasy|Mystery   \n",
       "...                      ...          ...                               ...   \n",
       "4688                     7.0    7830611.0           Documentary|Drama|Sport   \n",
       "4694                  6000.0  218051260.0    Action|Adventure|Drama|Romance   \n",
       "4708                   262.0   13300000.0         Documentary|History|Music   \n",
       "4747                   304.0     269061.0            Action|Adventure|Drama   \n",
       "4885                    81.0          NaN                 Drama|Romance|War   \n",
       "\n",
       "      ... num_user_for_reviews  language      country  content_rating  \\\n",
       "0     ...               3054.0   English          USA           PG-13   \n",
       "1     ...               1238.0   English          USA           PG-13   \n",
       "3     ...               2701.0   English          USA           PG-13   \n",
       "6     ...               1902.0   English          USA           PG-13   \n",
       "9     ...                973.0   English           UK              PG   \n",
       "...   ...                  ...       ...          ...             ...   \n",
       "4688  ...                 74.0   English          USA           PG-13   \n",
       "4694  ...               2618.0   English  New Zealand           PG-13   \n",
       "4708  ...                 63.0   English          USA               R   \n",
       "4747  ...                596.0  Japanese        Japan         Unrated   \n",
       "4885  ...                 45.0       NaN          USA       Not Rated   \n",
       "\n",
       "           budget  title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0     237000000.0      2009.0                  936.0        7.9          1.78   \n",
       "1     300000000.0      2007.0                 5000.0        7.1          2.35   \n",
       "3     250000000.0      2012.0                23000.0        8.5          2.35   \n",
       "6     258000000.0      2007.0                11000.0        6.2          2.35   \n",
       "9     250000000.0      2009.0                11000.0        7.5          2.35   \n",
       "...           ...         ...                    ...        ...           ...   \n",
       "4688     700000.0      1994.0                    6.0        8.3          1.33   \n",
       "4694  207000000.0      2005.0                  918.0        7.2          2.35   \n",
       "4708     600000.0      1970.0                  227.0        8.1          2.20   \n",
       "4747    2000000.0      1954.0                    8.0        8.7          1.37   \n",
       "4885     245000.0      1925.0                   12.0        8.3          1.33   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "3                  164000  \n",
       "6                       0  \n",
       "9                   10000  \n",
       "...                   ...  \n",
       "4688                    0  \n",
       "4694                    0  \n",
       "4708                    0  \n",
       "4747                11000  \n",
       "4885                  226  \n",
       "\n",
       "[205 rows x 28 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[data['duration']>150]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "d8177102",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>num_voted_users</th>\n",
       "      <th>cast_total_facebook_likes</th>\n",
       "      <th>facenumber_in_poster</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>4993.000000</td>\n",
       "      <td>5028.000000</td>\n",
       "      <td>4939.000000</td>\n",
       "      <td>5020.000000</td>\n",
       "      <td>5036.000000</td>\n",
       "      <td>4.159000e+03</td>\n",
       "      <td>5.043000e+03</td>\n",
       "      <td>5043.000000</td>\n",
       "      <td>5030.000000</td>\n",
       "      <td>5022.000000</td>\n",
       "      <td>4.551000e+03</td>\n",
       "      <td>4935.000000</td>\n",
       "      <td>5030.000000</td>\n",
       "      <td>5043.000000</td>\n",
       "      <td>4714.000000</td>\n",
       "      <td>5043.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>140.194272</td>\n",
       "      <td>107.201074</td>\n",
       "      <td>686.509212</td>\n",
       "      <td>645.009761</td>\n",
       "      <td>6560.047061</td>\n",
       "      <td>4.846841e+07</td>\n",
       "      <td>8.366816e+04</td>\n",
       "      <td>9699.063851</td>\n",
       "      <td>1.371173</td>\n",
       "      <td>272.770808</td>\n",
       "      <td>3.975262e+07</td>\n",
       "      <td>2002.470517</td>\n",
       "      <td>1651.754473</td>\n",
       "      <td>6.442138</td>\n",
       "      <td>2.220403</td>\n",
       "      <td>7525.964505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>121.601675</td>\n",
       "      <td>25.197441</td>\n",
       "      <td>2813.328607</td>\n",
       "      <td>1665.041728</td>\n",
       "      <td>15020.759120</td>\n",
       "      <td>6.845299e+07</td>\n",
       "      <td>1.384853e+05</td>\n",
       "      <td>18163.799124</td>\n",
       "      <td>2.013576</td>\n",
       "      <td>377.982886</td>\n",
       "      <td>2.061149e+08</td>\n",
       "      <td>12.474599</td>\n",
       "      <td>4042.438863</td>\n",
       "      <td>1.125116</td>\n",
       "      <td>1.385113</td>\n",
       "      <td>19320.445110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.620000e+02</td>\n",
       "      <td>5.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.180000e+02</td>\n",
       "      <td>1916.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.600000</td>\n",
       "      <td>1.180000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>50.000000</td>\n",
       "      <td>93.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>133.000000</td>\n",
       "      <td>614.000000</td>\n",
       "      <td>5.340988e+06</td>\n",
       "      <td>8.593500e+03</td>\n",
       "      <td>1411.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>65.000000</td>\n",
       "      <td>6.000000e+06</td>\n",
       "      <td>1999.000000</td>\n",
       "      <td>281.000000</td>\n",
       "      <td>5.800000</td>\n",
       "      <td>1.850000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>110.000000</td>\n",
       "      <td>103.000000</td>\n",
       "      <td>49.000000</td>\n",
       "      <td>371.500000</td>\n",
       "      <td>988.000000</td>\n",
       "      <td>2.551750e+07</td>\n",
       "      <td>3.435900e+04</td>\n",
       "      <td>3090.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>156.000000</td>\n",
       "      <td>2.000000e+07</td>\n",
       "      <td>2005.000000</td>\n",
       "      <td>595.000000</td>\n",
       "      <td>6.600000</td>\n",
       "      <td>2.350000</td>\n",
       "      <td>166.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>195.000000</td>\n",
       "      <td>118.000000</td>\n",
       "      <td>194.500000</td>\n",
       "      <td>636.000000</td>\n",
       "      <td>11000.000000</td>\n",
       "      <td>6.230944e+07</td>\n",
       "      <td>9.630900e+04</td>\n",
       "      <td>13756.500000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>326.000000</td>\n",
       "      <td>4.500000e+07</td>\n",
       "      <td>2011.000000</td>\n",
       "      <td>918.000000</td>\n",
       "      <td>7.200000</td>\n",
       "      <td>2.350000</td>\n",
       "      <td>3000.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>813.000000</td>\n",
       "      <td>511.000000</td>\n",
       "      <td>23000.000000</td>\n",
       "      <td>23000.000000</td>\n",
       "      <td>640000.000000</td>\n",
       "      <td>7.605058e+08</td>\n",
       "      <td>1.689764e+06</td>\n",
       "      <td>656730.000000</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>5060.000000</td>\n",
       "      <td>1.221550e+10</td>\n",
       "      <td>2016.000000</td>\n",
       "      <td>137000.000000</td>\n",
       "      <td>9.500000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>349000.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       num_critic_for_reviews     duration  director_facebook_likes  \\\n",
       "count             4993.000000  5028.000000              4939.000000   \n",
       "mean               140.194272   107.201074               686.509212   \n",
       "std                121.601675    25.197441              2813.328607   \n",
       "min                  1.000000     7.000000                 0.000000   \n",
       "25%                 50.000000    93.000000                 7.000000   \n",
       "50%                110.000000   103.000000                49.000000   \n",
       "75%                195.000000   118.000000               194.500000   \n",
       "max                813.000000   511.000000             23000.000000   \n",
       "\n",
       "       actor_3_facebook_likes  actor_1_facebook_likes         gross  \\\n",
       "count             5020.000000             5036.000000  4.159000e+03   \n",
       "mean               645.009761             6560.047061  4.846841e+07   \n",
       "std               1665.041728            15020.759120  6.845299e+07   \n",
       "min                  0.000000                0.000000  1.620000e+02   \n",
       "25%                133.000000              614.000000  5.340988e+06   \n",
       "50%                371.500000              988.000000  2.551750e+07   \n",
       "75%                636.000000            11000.000000  6.230944e+07   \n",
       "max              23000.000000           640000.000000  7.605058e+08   \n",
       "\n",
       "       num_voted_users  cast_total_facebook_likes  facenumber_in_poster  \\\n",
       "count     5.043000e+03                5043.000000           5030.000000   \n",
       "mean      8.366816e+04                9699.063851              1.371173   \n",
       "std       1.384853e+05               18163.799124              2.013576   \n",
       "min       5.000000e+00                   0.000000              0.000000   \n",
       "25%       8.593500e+03                1411.000000              0.000000   \n",
       "50%       3.435900e+04                3090.000000              1.000000   \n",
       "75%       9.630900e+04               13756.500000              2.000000   \n",
       "max       1.689764e+06              656730.000000             43.000000   \n",
       "\n",
       "       num_user_for_reviews        budget   title_year  \\\n",
       "count           5022.000000  4.551000e+03  4935.000000   \n",
       "mean             272.770808  3.975262e+07  2002.470517   \n",
       "std              377.982886  2.061149e+08    12.474599   \n",
       "min                1.000000  2.180000e+02  1916.000000   \n",
       "25%               65.000000  6.000000e+06  1999.000000   \n",
       "50%              156.000000  2.000000e+07  2005.000000   \n",
       "75%              326.000000  4.500000e+07  2011.000000   \n",
       "max             5060.000000  1.221550e+10  2016.000000   \n",
       "\n",
       "       actor_2_facebook_likes   imdb_score  aspect_ratio  movie_facebook_likes  \n",
       "count             5030.000000  5043.000000   4714.000000           5043.000000  \n",
       "mean              1651.754473     6.442138      2.220403           7525.964505  \n",
       "std               4042.438863     1.125116      1.385113          19320.445110  \n",
       "min                  0.000000     1.600000      1.180000              0.000000  \n",
       "25%                281.000000     5.800000      1.850000              0.000000  \n",
       "50%                595.000000     6.600000      2.350000            166.000000  \n",
       "75%                918.000000     7.200000      2.350000           3000.000000  \n",
       "max             137000.000000     9.500000     16.000000         349000.000000  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "a8bc54f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5043 entries, 0 to 5042\n",
      "Data columns (total 28 columns):\n",
      " #   Column                     Non-Null Count  Dtype  \n",
      "---  ------                     --------------  -----  \n",
      " 0   color                      5024 non-null   object \n",
      " 1   director_name              4939 non-null   object \n",
      " 2   num_critic_for_reviews     4993 non-null   float64\n",
      " 3   duration                   5028 non-null   float64\n",
      " 4   director_facebook_likes    4939 non-null   float64\n",
      " 5   actor_3_facebook_likes     5020 non-null   float64\n",
      " 6   actor_2_name               5030 non-null   object \n",
      " 7   actor_1_facebook_likes     5036 non-null   float64\n",
      " 8   gross                      4159 non-null   float64\n",
      " 9   genres                     5043 non-null   object \n",
      " 10  actor_1_name               5036 non-null   object \n",
      " 11  movie_title                5043 non-null   object \n",
      " 12  num_voted_users            5043 non-null   int64  \n",
      " 13  cast_total_facebook_likes  5043 non-null   int64  \n",
      " 14  actor_3_name               5020 non-null   object \n",
      " 15  facenumber_in_poster       5030 non-null   float64\n",
      " 16  plot_keywords              4890 non-null   object \n",
      " 17  movie_imdb_link            5043 non-null   object \n",
      " 18  num_user_for_reviews       5022 non-null   float64\n",
      " 19  language                   5031 non-null   object \n",
      " 20  country                    5038 non-null   object \n",
      " 21  content_rating             4740 non-null   object \n",
      " 22  budget                     4551 non-null   float64\n",
      " 23  title_year                 4935 non-null   float64\n",
      " 24  actor_2_facebook_likes     5030 non-null   float64\n",
      " 25  imdb_score                 5043 non-null   float64\n",
      " 26  aspect_ratio               4714 non-null   float64\n",
      " 27  movie_facebook_likes       5043 non-null   int64  \n",
      "dtypes: float64(13), int64(3), object(12)\n",
      "memory usage: 1.1+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "136788f2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0          USA\n",
       "1          USA\n",
       "2           UK\n",
       "3          USA\n",
       "4             \n",
       "         ...  \n",
       "5038    Canada\n",
       "5039       USA\n",
       "5040       USA\n",
       "5041       USA\n",
       "5042       USA\n",
       "Name: country, Length: 5043, dtype: object"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.country.fillna(\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "12cecc76",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.duration = data.duration.fillna(data.duration.mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "2d798e0f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       178.000000\n",
       "1       169.000000\n",
       "2       148.000000\n",
       "3       164.000000\n",
       "4       107.201074\n",
       "           ...    \n",
       "5038     87.000000\n",
       "5039     43.000000\n",
       "5040     76.000000\n",
       "5041    100.000000\n",
       "5042     90.000000\n",
       "Name: duration, Length: 5043, dtype: float64"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.duration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "42558775",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5026</th>\n",
       "      <td>Color</td>\n",
       "      <td>Olivier Assayas</td>\n",
       "      <td>81.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>107.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>Béatrice Dalle</td>\n",
       "      <td>576.0</td>\n",
       "      <td>136007.0</td>\n",
       "      <td>Drama|Music|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>39.0</td>\n",
       "      <td>French</td>\n",
       "      <td>France</td>\n",
       "      <td>R</td>\n",
       "      <td>4500.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>133.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>2.35</td>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5027</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jafar Panahi</td>\n",
       "      <td>64.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>397.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Nargess Mamizadeh</td>\n",
       "      <td>5.0</td>\n",
       "      <td>673780.0</td>\n",
       "      <td>Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>26.0</td>\n",
       "      <td>Persian</td>\n",
       "      <td>Iran</td>\n",
       "      <td>Not Rated</td>\n",
       "      <td>10000.0</td>\n",
       "      <td>2000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>1.85</td>\n",
       "      <td>697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5033</th>\n",
       "      <td>Color</td>\n",
       "      <td>Shane Carruth</td>\n",
       "      <td>143.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>291.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>David Sullivan</td>\n",
       "      <td>291.0</td>\n",
       "      <td>424760.0</td>\n",
       "      <td>Drama|Sci-Fi|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>371.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>1.85</td>\n",
       "      <td>19000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5035</th>\n",
       "      <td>Color</td>\n",
       "      <td>Robert Rodriguez</td>\n",
       "      <td>56.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Peter Marquardt</td>\n",
       "      <td>121.0</td>\n",
       "      <td>2040920.0</td>\n",
       "      <td>Action|Crime|Drama|Romance|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>130.0</td>\n",
       "      <td>Spanish</td>\n",
       "      <td>USA</td>\n",
       "      <td>R</td>\n",
       "      <td>7000.0</td>\n",
       "      <td>1992.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>6.9</td>\n",
       "      <td>1.37</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3756 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5026  Color    Olivier Assayas                    81.0     110.0   \n",
       "5027  Color       Jafar Panahi                    64.0      90.0   \n",
       "5033  Color      Shane Carruth                   143.0      77.0   \n",
       "5035  Color   Robert Rodriguez                    56.0      81.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes       actor_2_name  \\\n",
       "0                         0.0                   855.0   Joel David Moore   \n",
       "1                       563.0                  1000.0      Orlando Bloom   \n",
       "2                         0.0                   161.0       Rory Kinnear   \n",
       "3                     22000.0                 23000.0     Christian Bale   \n",
       "5                       475.0                   530.0    Samantha Morton   \n",
       "...                       ...                     ...                ...   \n",
       "5026                    107.0                    45.0     Béatrice Dalle   \n",
       "5027                    397.0                     0.0  Nargess Mamizadeh   \n",
       "5033                    291.0                     8.0     David Sullivan   \n",
       "5035                      0.0                     6.0    Peter Marquardt   \n",
       "5042                     16.0                    16.0   Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross  \\\n",
       "0                     1000.0  760505847.0   \n",
       "1                    40000.0  309404152.0   \n",
       "2                    11000.0  200074175.0   \n",
       "3                    27000.0  448130642.0   \n",
       "5                      640.0   73058679.0   \n",
       "...                      ...          ...   \n",
       "5026                   576.0     136007.0   \n",
       "5027                     5.0     673780.0   \n",
       "5033                   291.0     424760.0   \n",
       "5035                   121.0    2040920.0   \n",
       "5042                    86.0      85222.0   \n",
       "\n",
       "                                   genres  ... num_user_for_reviews language  \\\n",
       "0         Action|Adventure|Fantasy|Sci-Fi  ...               3054.0  English   \n",
       "1                Action|Adventure|Fantasy  ...               1238.0  English   \n",
       "2               Action|Adventure|Thriller  ...                994.0  English   \n",
       "3                         Action|Thriller  ...               2701.0  English   \n",
       "5                 Action|Adventure|Sci-Fi  ...                738.0  English   \n",
       "...                                   ...  ...                  ...      ...   \n",
       "5026                  Drama|Music|Romance  ...                 39.0   French   \n",
       "5027                                Drama  ...                 26.0  Persian   \n",
       "5033                Drama|Sci-Fi|Thriller  ...                371.0  English   \n",
       "5035  Action|Crime|Drama|Romance|Thriller  ...                130.0  Spanish   \n",
       "5042                          Documentary  ...                 84.0  English   \n",
       "\n",
       "      country  content_rating       budget  title_year actor_2_facebook_likes  \\\n",
       "0         USA           PG-13  237000000.0      2009.0                  936.0   \n",
       "1         USA           PG-13  300000000.0      2007.0                 5000.0   \n",
       "2          UK           PG-13  245000000.0      2015.0                  393.0   \n",
       "3         USA           PG-13  250000000.0      2012.0                23000.0   \n",
       "5         USA           PG-13  263700000.0      2012.0                  632.0   \n",
       "...       ...             ...          ...         ...                    ...   \n",
       "5026   France               R       4500.0      2004.0                  133.0   \n",
       "5027     Iran       Not Rated      10000.0      2000.0                    0.0   \n",
       "5033      USA           PG-13       7000.0      2004.0                   45.0   \n",
       "5035      USA               R       7000.0      1992.0                   20.0   \n",
       "5042      USA              PG       1100.0      2004.0                   23.0   \n",
       "\n",
       "     imdb_score  aspect_ratio movie_facebook_likes  \n",
       "0           7.9          1.78                33000  \n",
       "1           7.1          2.35                    0  \n",
       "2           6.8          2.35                85000  \n",
       "3           8.5          2.35               164000  \n",
       "5           6.6          2.35                24000  \n",
       "...         ...           ...                  ...  \n",
       "5026        6.9          2.35                  171  \n",
       "5027        7.5          1.85                  697  \n",
       "5033        7.0          1.85                19000  \n",
       "5035        6.9          1.37                    0  \n",
       "5042        6.6          1.85                  456  \n",
       "\n",
       "[3756 rows x 28 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "952b95c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Color</td>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>462.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>530.0</td>\n",
       "      <td>Samantha Morton</td>\n",
       "      <td>640.0</td>\n",
       "      <td>73058679.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>738.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>263700000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>632.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>2.35</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Color</td>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>1.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "      <td>637.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>English</td>\n",
       "      <td>Canada</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>470.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>Color</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>43.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>319.0</td>\n",
       "      <td>Valorie Curry</td>\n",
       "      <td>841.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Crime|Drama|Mystery|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>359.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>TV-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>593.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>16.00</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Color</td>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>13.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Color</td>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>14.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "      <td>946.0</td>\n",
       "      <td>10443.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.35</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5036 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews  duration  \\\n",
       "0     Color      James Cameron                   723.0     178.0   \n",
       "1     Color     Gore Verbinski                   302.0     169.0   \n",
       "2     Color         Sam Mendes                   602.0     148.0   \n",
       "3     Color  Christopher Nolan                   813.0     164.0   \n",
       "5     Color     Andrew Stanton                   462.0     132.0   \n",
       "...     ...                ...                     ...       ...   \n",
       "5038  Color        Scott Smith                     1.0      87.0   \n",
       "5039  Color                NaN                    43.0      43.0   \n",
       "5040  Color   Benjamin Roberds                    13.0      76.0   \n",
       "5041  Color        Daniel Hsia                    14.0     100.0   \n",
       "5042  Color           Jon Gunn                    43.0      90.0   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                         0.0                   855.0  Joel David Moore   \n",
       "1                       563.0                  1000.0     Orlando Bloom   \n",
       "2                         0.0                   161.0      Rory Kinnear   \n",
       "3                     22000.0                 23000.0    Christian Bale   \n",
       "5                       475.0                   530.0   Samantha Morton   \n",
       "...                       ...                     ...               ...   \n",
       "5038                      2.0                   318.0     Daphne Zuniga   \n",
       "5039                      NaN                   319.0     Valorie Curry   \n",
       "5040                      0.0                     0.0     Maxwell Moody   \n",
       "5041                      0.0                   489.0     Daniel Henney   \n",
       "5042                     16.0                    16.0  Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross                           genres  \\\n",
       "0                     1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0         Action|Adventure|Fantasy   \n",
       "2                    11000.0  200074175.0        Action|Adventure|Thriller   \n",
       "3                    27000.0  448130642.0                  Action|Thriller   \n",
       "5                      640.0   73058679.0          Action|Adventure|Sci-Fi   \n",
       "...                      ...          ...                              ...   \n",
       "5038                   637.0          NaN                     Comedy|Drama   \n",
       "5039                   841.0          NaN     Crime|Drama|Mystery|Thriller   \n",
       "5040                     0.0          NaN            Drama|Horror|Thriller   \n",
       "5041                   946.0      10443.0             Comedy|Drama|Romance   \n",
       "5042                    86.0      85222.0                      Documentary   \n",
       "\n",
       "      ... num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0     ...               3054.0  English      USA           PG-13  237000000.0   \n",
       "1     ...               1238.0  English      USA           PG-13  300000000.0   \n",
       "2     ...                994.0  English       UK           PG-13  245000000.0   \n",
       "3     ...               2701.0  English      USA           PG-13  250000000.0   \n",
       "5     ...                738.0  English      USA           PG-13  263700000.0   \n",
       "...   ...                  ...      ...      ...             ...          ...   \n",
       "5038  ...                  6.0  English   Canada             NaN          NaN   \n",
       "5039  ...                359.0  English      USA           TV-14          NaN   \n",
       "5040  ...                  3.0  English      USA             NaN       1400.0   \n",
       "5041  ...                  9.0  English      USA           PG-13          NaN   \n",
       "5042  ...                 84.0  English      USA              PG       1100.0   \n",
       "\n",
       "      title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0         2009.0                  936.0        7.9          1.78   \n",
       "1         2007.0                 5000.0        7.1          2.35   \n",
       "2         2015.0                  393.0        6.8          2.35   \n",
       "3         2012.0                23000.0        8.5          2.35   \n",
       "5         2012.0                  632.0        6.6          2.35   \n",
       "...          ...                    ...        ...           ...   \n",
       "5038      2013.0                  470.0        7.7           NaN   \n",
       "5039         NaN                  593.0        7.5         16.00   \n",
       "5040      2013.0                    0.0        6.3           NaN   \n",
       "5041      2012.0                  719.0        6.3          2.35   \n",
       "5042      2004.0                   23.0        6.6          1.85   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "2                   85000  \n",
       "3                  164000  \n",
       "5                   24000  \n",
       "...                   ...  \n",
       "5038                   84  \n",
       "5039                32000  \n",
       "5040                   16  \n",
       "5041                  660  \n",
       "5042                  456  \n",
       "\n",
       "[5036 rows x 28 columns]"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(thresh=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "26ae5a35",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>title_year</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.000000</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000.0</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000.0</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.000000</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000.0</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>107.201074</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Color</td>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>1.0</td>\n",
       "      <td>87.000000</td>\n",
       "      <td>2.0</td>\n",
       "      <td>318.0</td>\n",
       "      <td>Daphne Zuniga</td>\n",
       "      <td>637.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>English</td>\n",
       "      <td>Canada</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>470.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>Color</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43.0</td>\n",
       "      <td>43.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>319.0</td>\n",
       "      <td>Valorie Curry</td>\n",
       "      <td>841.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Crime|Drama|Mystery|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>359.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>TV-14</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>593.0</td>\n",
       "      <td>7.5</td>\n",
       "      <td>16.00</td>\n",
       "      <td>32000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Color</td>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>13.0</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Maxwell Moody</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>3.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Color</td>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>14.0</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>Daniel Henney</td>\n",
       "      <td>946.0</td>\n",
       "      <td>10443.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>...</td>\n",
       "      <td>9.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>719.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>2.35</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Color</td>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>43.0</td>\n",
       "      <td>90.000000</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Brian Herzlinger</td>\n",
       "      <td>86.0</td>\n",
       "      <td>85222.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG</td>\n",
       "      <td>1100.0</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>1.85</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5043 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      color      director_name  num_critic_for_reviews    duration  \\\n",
       "0     Color      James Cameron                   723.0  178.000000   \n",
       "1     Color     Gore Verbinski                   302.0  169.000000   \n",
       "2     Color         Sam Mendes                   602.0  148.000000   \n",
       "3     Color  Christopher Nolan                   813.0  164.000000   \n",
       "4       NaN        Doug Walker                     NaN  107.201074   \n",
       "...     ...                ...                     ...         ...   \n",
       "5038  Color        Scott Smith                     1.0   87.000000   \n",
       "5039  Color                NaN                    43.0   43.000000   \n",
       "5040  Color   Benjamin Roberds                    13.0   76.000000   \n",
       "5041  Color        Daniel Hsia                    14.0  100.000000   \n",
       "5042  Color           Jon Gunn                    43.0   90.000000   \n",
       "\n",
       "      director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                         0.0                   855.0  Joel David Moore   \n",
       "1                       563.0                  1000.0     Orlando Bloom   \n",
       "2                         0.0                   161.0      Rory Kinnear   \n",
       "3                     22000.0                 23000.0    Christian Bale   \n",
       "4                       131.0                     NaN        Rob Walker   \n",
       "...                       ...                     ...               ...   \n",
       "5038                      2.0                   318.0     Daphne Zuniga   \n",
       "5039                      NaN                   319.0     Valorie Curry   \n",
       "5040                      0.0                     0.0     Maxwell Moody   \n",
       "5041                      0.0                   489.0     Daniel Henney   \n",
       "5042                     16.0                    16.0  Brian Herzlinger   \n",
       "\n",
       "      actor_1_facebook_likes        gross                           genres  \\\n",
       "0                     1000.0  760505847.0  Action|Adventure|Fantasy|Sci-Fi   \n",
       "1                    40000.0  309404152.0         Action|Adventure|Fantasy   \n",
       "2                    11000.0  200074175.0        Action|Adventure|Thriller   \n",
       "3                    27000.0  448130642.0                  Action|Thriller   \n",
       "4                      131.0          NaN                      Documentary   \n",
       "...                      ...          ...                              ...   \n",
       "5038                   637.0          NaN                     Comedy|Drama   \n",
       "5039                   841.0          NaN     Crime|Drama|Mystery|Thriller   \n",
       "5040                     0.0          NaN            Drama|Horror|Thriller   \n",
       "5041                   946.0      10443.0             Comedy|Drama|Romance   \n",
       "5042                    86.0      85222.0                      Documentary   \n",
       "\n",
       "      ... num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0     ...               3054.0  English      USA           PG-13  237000000.0   \n",
       "1     ...               1238.0  English      USA           PG-13  300000000.0   \n",
       "2     ...                994.0  English       UK           PG-13  245000000.0   \n",
       "3     ...               2701.0  English      USA           PG-13  250000000.0   \n",
       "4     ...                  NaN      NaN      NaN             NaN          NaN   \n",
       "...   ...                  ...      ...      ...             ...          ...   \n",
       "5038  ...                  6.0  English   Canada             NaN          NaN   \n",
       "5039  ...                359.0  English      USA           TV-14          NaN   \n",
       "5040  ...                  3.0  English      USA             NaN       1400.0   \n",
       "5041  ...                  9.0  English      USA           PG-13          NaN   \n",
       "5042  ...                 84.0  English      USA              PG       1100.0   \n",
       "\n",
       "      title_year actor_2_facebook_likes imdb_score  aspect_ratio  \\\n",
       "0         2009.0                  936.0        7.9          1.78   \n",
       "1         2007.0                 5000.0        7.1          2.35   \n",
       "2         2015.0                  393.0        6.8          2.35   \n",
       "3         2012.0                23000.0        8.5          2.35   \n",
       "4            NaN                   12.0        7.1           NaN   \n",
       "...          ...                    ...        ...           ...   \n",
       "5038      2013.0                  470.0        7.7           NaN   \n",
       "5039         NaN                  593.0        7.5         16.00   \n",
       "5040      2013.0                    0.0        6.3           NaN   \n",
       "5041      2012.0                  719.0        6.3          2.35   \n",
       "5042      2004.0                   23.0        6.6          1.85   \n",
       "\n",
       "     movie_facebook_likes  \n",
       "0                   33000  \n",
       "1                       0  \n",
       "2                   85000  \n",
       "3                  164000  \n",
       "4                       0  \n",
       "...                   ...  \n",
       "5038                   84  \n",
       "5039                32000  \n",
       "5040                   16  \n",
       "5041                  660  \n",
       "5042                  456  \n",
       "\n",
       "[5043 rows x 28 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(how=\"all\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "ff4d79ba",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    4935\n",
       "True      108\n",
       "Name: title_year, dtype: int64"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.title_year.isnull().value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "2c63e542",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.dropna(subset=['title_year'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "c3494ebd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    4935\n",
       "Name: title_year, dtype: int64"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.title_year.isnull().value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "5050303b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>director_name</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>genres</th>\n",
       "      <th>movie_title</th>\n",
       "      <th>num_voted_users</th>\n",
       "      <th>cast_total_facebook_likes</th>\n",
       "      <th>movie_imdb_link</th>\n",
       "      <th>title_year</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>movie_facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>James Cameron</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>Avatar</td>\n",
       "      <td>886204</td>\n",
       "      <td>4834</td>\n",
       "      <td>http://www.imdb.com/title/tt0499549/?ref_=fn_t...</td>\n",
       "      <td>2009.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
       "      <td>471220</td>\n",
       "      <td>48350</td>\n",
       "      <td>http://www.imdb.com/title/tt0449088/?ref_=fn_t...</td>\n",
       "      <td>2007.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>Spectre</td>\n",
       "      <td>275868</td>\n",
       "      <td>11700</td>\n",
       "      <td>http://www.imdb.com/title/tt2379713/?ref_=fn_t...</td>\n",
       "      <td>2015.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>1144337</td>\n",
       "      <td>106759</td>\n",
       "      <td>http://www.imdb.com/title/tt1345836/?ref_=fn_t...</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Andrew Stanton</td>\n",
       "      <td>132.0</td>\n",
       "      <td>475.0</td>\n",
       "      <td>Action|Adventure|Sci-Fi</td>\n",
       "      <td>John Carter</td>\n",
       "      <td>212204</td>\n",
       "      <td>1873</td>\n",
       "      <td>http://www.imdb.com/title/tt0401729/?ref_=fn_t...</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>24000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5037</th>\n",
       "      <td>Edward Burns</td>\n",
       "      <td>95.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>Newlyweds</td>\n",
       "      <td>1338</td>\n",
       "      <td>690</td>\n",
       "      <td>http://www.imdb.com/title/tt1880418/?ref_=fn_t...</td>\n",
       "      <td>2011.0</td>\n",
       "      <td>6.4</td>\n",
       "      <td>413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Scott Smith</td>\n",
       "      <td>87.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Comedy|Drama</td>\n",
       "      <td>Signed Sealed Delivered</td>\n",
       "      <td>629</td>\n",
       "      <td>2283</td>\n",
       "      <td>http://www.imdb.com/title/tt3000844/?ref_=fn_t...</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>7.7</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>Benjamin Roberds</td>\n",
       "      <td>76.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Drama|Horror|Thriller</td>\n",
       "      <td>A Plague So Pleasant</td>\n",
       "      <td>38</td>\n",
       "      <td>0</td>\n",
       "      <td>http://www.imdb.com/title/tt2107644/?ref_=fn_t...</td>\n",
       "      <td>2013.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Daniel Hsia</td>\n",
       "      <td>100.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>Comedy|Drama|Romance</td>\n",
       "      <td>Shanghai Calling</td>\n",
       "      <td>1255</td>\n",
       "      <td>2386</td>\n",
       "      <td>http://www.imdb.com/title/tt2070597/?ref_=fn_t...</td>\n",
       "      <td>2012.0</td>\n",
       "      <td>6.3</td>\n",
       "      <td>660</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>Jon Gunn</td>\n",
       "      <td>90.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>4285</td>\n",
       "      <td>163</td>\n",
       "      <td>http://www.imdb.com/title/tt0378407/?ref_=fn_t...</td>\n",
       "      <td>2004.0</td>\n",
       "      <td>6.6</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4935 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          director_name  duration  director_facebook_likes  \\\n",
       "0         James Cameron     178.0                      0.0   \n",
       "1        Gore Verbinski     169.0                    563.0   \n",
       "2            Sam Mendes     148.0                      0.0   \n",
       "3     Christopher Nolan     164.0                  22000.0   \n",
       "5        Andrew Stanton     132.0                    475.0   \n",
       "...                 ...       ...                      ...   \n",
       "5037       Edward Burns      95.0                      0.0   \n",
       "5038        Scott Smith      87.0                      2.0   \n",
       "5040   Benjamin Roberds      76.0                      0.0   \n",
       "5041        Daniel Hsia     100.0                      0.0   \n",
       "5042           Jon Gunn      90.0                     16.0   \n",
       "\n",
       "                               genres  \\\n",
       "0     Action|Adventure|Fantasy|Sci-Fi   \n",
       "1            Action|Adventure|Fantasy   \n",
       "2           Action|Adventure|Thriller   \n",
       "3                     Action|Thriller   \n",
       "5             Action|Adventure|Sci-Fi   \n",
       "...                               ...   \n",
       "5037                     Comedy|Drama   \n",
       "5038                     Comedy|Drama   \n",
       "5040            Drama|Horror|Thriller   \n",
       "5041             Comedy|Drama|Romance   \n",
       "5042                      Documentary   \n",
       "\n",
       "                                    movie_title  num_voted_users  \\\n",
       "0                                       Avatar            886204   \n",
       "1     Pirates of the Caribbean: At World's End            471220   \n",
       "2                                      Spectre            275868   \n",
       "3                        The Dark Knight Rises           1144337   \n",
       "5                                  John Carter            212204   \n",
       "...                                         ...              ...   \n",
       "5037                                 Newlyweds              1338   \n",
       "5038                   Signed Sealed Delivered               629   \n",
       "5040                      A Plague So Pleasant                38   \n",
       "5041                          Shanghai Calling              1255   \n",
       "5042                         My Date with Drew              4285   \n",
       "\n",
       "      cast_total_facebook_likes  \\\n",
       "0                          4834   \n",
       "1                         48350   \n",
       "2                         11700   \n",
       "3                        106759   \n",
       "5                          1873   \n",
       "...                         ...   \n",
       "5037                        690   \n",
       "5038                       2283   \n",
       "5040                          0   \n",
       "5041                       2386   \n",
       "5042                        163   \n",
       "\n",
       "                                        movie_imdb_link  title_year  \\\n",
       "0     http://www.imdb.com/title/tt0499549/?ref_=fn_t...      2009.0   \n",
       "1     http://www.imdb.com/title/tt0449088/?ref_=fn_t...      2007.0   \n",
       "2     http://www.imdb.com/title/tt2379713/?ref_=fn_t...      2015.0   \n",
       "3     http://www.imdb.com/title/tt1345836/?ref_=fn_t...      2012.0   \n",
       "5     http://www.imdb.com/title/tt0401729/?ref_=fn_t...      2012.0   \n",
       "...                                                 ...         ...   \n",
       "5037  http://www.imdb.com/title/tt1880418/?ref_=fn_t...      2011.0   \n",
       "5038  http://www.imdb.com/title/tt3000844/?ref_=fn_t...      2013.0   \n",
       "5040  http://www.imdb.com/title/tt2107644/?ref_=fn_t...      2013.0   \n",
       "5041  http://www.imdb.com/title/tt2070597/?ref_=fn_t...      2012.0   \n",
       "5042  http://www.imdb.com/title/tt0378407/?ref_=fn_t...      2004.0   \n",
       "\n",
       "      imdb_score  movie_facebook_likes  \n",
       "0            7.9                 33000  \n",
       "1            7.1                     0  \n",
       "2            6.8                 85000  \n",
       "3            8.5                164000  \n",
       "5            6.6                 24000  \n",
       "...          ...                   ...  \n",
       "5037         6.4                   413  \n",
       "5038         7.7                    84  \n",
       "5040         6.3                    16  \n",
       "5041         6.3                   660  \n",
       "5042         6.6                   456  \n",
       "\n",
       "[4935 rows x 11 columns]"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.dropna(axis=1, how='any')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "07c9b00d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 4935 entries, 0 to 5042\n",
      "Data columns (total 28 columns):\n",
      " #   Column                     Non-Null Count  Dtype  \n",
      "---  ------                     --------------  -----  \n",
      " 0   color                      4920 non-null   object \n",
      " 1   director_name              4935 non-null   object \n",
      " 2   num_critic_for_reviews     4894 non-null   float64\n",
      " 3   duration                   4935 non-null   float64\n",
      " 4   director_facebook_likes    4935 non-null   float64\n",
      " 5   actor_3_facebook_likes     4917 non-null   float64\n",
      " 6   actor_2_name               4925 non-null   object \n",
      " 7   actor_1_facebook_likes     4928 non-null   float64\n",
      " 8   gross                      4156 non-null   float64\n",
      " 9   genres                     4935 non-null   object \n",
      " 10  actor_1_name               4928 non-null   object \n",
      " 11  movie_title                4935 non-null   object \n",
      " 12  num_voted_users            4935 non-null   int64  \n",
      " 13  cast_total_facebook_likes  4935 non-null   int64  \n",
      " 14  actor_3_name               4917 non-null   object \n",
      " 15  facenumber_in_poster       4922 non-null   float64\n",
      " 16  plot_keywords              4795 non-null   object \n",
      " 17  movie_imdb_link            4935 non-null   object \n",
      " 18  num_user_for_reviews       4920 non-null   float64\n",
      " 19  language                   4926 non-null   object \n",
      " 20  country                    4934 non-null   object \n",
      " 21  content_rating             4674 non-null   object \n",
      " 22  budget                     4543 non-null   float64\n",
      " 23  title_year                 4935 non-null   float64\n",
      " 24  actor_2_facebook_likes     4925 non-null   float64\n",
      " 25  imdb_score                 4935 non-null   float64\n",
      " 26  aspect_ratio               4628 non-null   float64\n",
      " 27  movie_facebook_likes       4935 non-null   int64  \n",
      "dtypes: float64(13), int64(3), object(12)\n",
      "memory usage: 1.1+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "de3c34bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('./data/movie_metadata.csv', dtype={'title_year':str,'actor_1_facebook_likes':str})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "ddc46dcf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 5043 entries, 0 to 5042\n",
      "Data columns (total 28 columns):\n",
      " #   Column                     Non-Null Count  Dtype  \n",
      "---  ------                     --------------  -----  \n",
      " 0   color                      5024 non-null   object \n",
      " 1   director_name              4939 non-null   object \n",
      " 2   num_critic_for_reviews     4993 non-null   float64\n",
      " 3   duration                   5028 non-null   float64\n",
      " 4   director_facebook_likes    4939 non-null   float64\n",
      " 5   actor_3_facebook_likes     5020 non-null   float64\n",
      " 6   actor_2_name               5030 non-null   object \n",
      " 7   actor_1_facebook_likes     5036 non-null   object \n",
      " 8   gross                      4159 non-null   float64\n",
      " 9   genres                     5043 non-null   object \n",
      " 10  actor_1_name               5036 non-null   object \n",
      " 11  movie_title                5043 non-null   object \n",
      " 12  num_voted_users            5043 non-null   int64  \n",
      " 13  cast_total_facebook_likes  5043 non-null   int64  \n",
      " 14  actor_3_name               5020 non-null   object \n",
      " 15  facenumber_in_poster       5030 non-null   float64\n",
      " 16  plot_keywords              4890 non-null   object \n",
      " 17  movie_imdb_link            5043 non-null   object \n",
      " 18  num_user_for_reviews       5022 non-null   float64\n",
      " 19  language                   5031 non-null   object \n",
      " 20  country                    5038 non-null   object \n",
      " 21  content_rating             4740 non-null   object \n",
      " 22  budget                     4551 non-null   float64\n",
      " 23  title_year                 4935 non-null   object \n",
      " 24  actor_2_facebook_likes     5030 non-null   float64\n",
      " 25  imdb_score                 5043 non-null   float64\n",
      " 26  aspect_ratio               4714 non-null   float64\n",
      " 27  movie_facebook_likes       5043 non-null   int64  \n",
      "dtypes: float64(11), int64(3), object(14)\n",
      "memory usage: 1.1+ MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "5868c15f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Avatar</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Pirates of the Caribbean</td>\n",
       "      <td>At World's End</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Spectre</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>The Dark Knight Rises</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Star Wars</td>\n",
       "      <td>Episode VII - The Force Awakens</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5038</th>\n",
       "      <td>Signed Sealed Delivered</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5039</th>\n",
       "      <td>The Following</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5040</th>\n",
       "      <td>A Plague So Pleasant</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5041</th>\n",
       "      <td>Shanghai Calling</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5042</th>\n",
       "      <td>My Date with Drew</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5043 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                               0  \\\n",
       "0                        Avatar    \n",
       "1       Pirates of the Caribbean   \n",
       "2                       Spectre    \n",
       "3         The Dark Knight Rises    \n",
       "4                      Star Wars   \n",
       "...                          ...   \n",
       "5038    Signed Sealed Delivered    \n",
       "5039  The Following                \n",
       "5040       A Plague So Pleasant    \n",
       "5041           Shanghai Calling    \n",
       "5042          My Date with Drew    \n",
       "\n",
       "                                                  1  \n",
       "0                                              None  \n",
       "1                                   At World's End   \n",
       "2                                              None  \n",
       "3                                              None  \n",
       "4      Episode VII - The Force Awakens               \n",
       "...                                             ...  \n",
       "5038                                           None  \n",
       "5039                                           None  \n",
       "5040                                           None  \n",
       "5041                                           None  \n",
       "5042                                           None  \n",
       "\n",
       "[5043 rows x 2 columns]"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.movie_title.str.split(\":\",expand=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "a327ffb4",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data.rename(columns={'title_year':'release_date',\n",
    " 'movie_facebook_likes':'facebook_likes'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "2fce1593",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>color</th>\n",
       "      <th>director_name</th>\n",
       "      <th>num_critic_for_reviews</th>\n",
       "      <th>duration</th>\n",
       "      <th>director_facebook_likes</th>\n",
       "      <th>actor_3_facebook_likes</th>\n",
       "      <th>actor_2_name</th>\n",
       "      <th>actor_1_facebook_likes</th>\n",
       "      <th>gross</th>\n",
       "      <th>genres</th>\n",
       "      <th>...</th>\n",
       "      <th>num_user_for_reviews</th>\n",
       "      <th>language</th>\n",
       "      <th>country</th>\n",
       "      <th>content_rating</th>\n",
       "      <th>budget</th>\n",
       "      <th>release_date</th>\n",
       "      <th>actor_2_facebook_likes</th>\n",
       "      <th>imdb_score</th>\n",
       "      <th>aspect_ratio</th>\n",
       "      <th>facebook_likes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Color</td>\n",
       "      <td>James Cameron</td>\n",
       "      <td>723.0</td>\n",
       "      <td>178.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>855.0</td>\n",
       "      <td>Joel David Moore</td>\n",
       "      <td>1000</td>\n",
       "      <td>760505847.0</td>\n",
       "      <td>Action|Adventure|Fantasy|Sci-Fi</td>\n",
       "      <td>...</td>\n",
       "      <td>3054.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>237000000.0</td>\n",
       "      <td>2009</td>\n",
       "      <td>936.0</td>\n",
       "      <td>7.9</td>\n",
       "      <td>1.78</td>\n",
       "      <td>33000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Color</td>\n",
       "      <td>Gore Verbinski</td>\n",
       "      <td>302.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>563.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>Orlando Bloom</td>\n",
       "      <td>40000</td>\n",
       "      <td>309404152.0</td>\n",
       "      <td>Action|Adventure|Fantasy</td>\n",
       "      <td>...</td>\n",
       "      <td>1238.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>300000000.0</td>\n",
       "      <td>2007</td>\n",
       "      <td>5000.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>2.35</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Color</td>\n",
       "      <td>Sam Mendes</td>\n",
       "      <td>602.0</td>\n",
       "      <td>148.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>161.0</td>\n",
       "      <td>Rory Kinnear</td>\n",
       "      <td>11000</td>\n",
       "      <td>200074175.0</td>\n",
       "      <td>Action|Adventure|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>994.0</td>\n",
       "      <td>English</td>\n",
       "      <td>UK</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>245000000.0</td>\n",
       "      <td>2015</td>\n",
       "      <td>393.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>2.35</td>\n",
       "      <td>85000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Color</td>\n",
       "      <td>Christopher Nolan</td>\n",
       "      <td>813.0</td>\n",
       "      <td>164.0</td>\n",
       "      <td>22000.0</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>Christian Bale</td>\n",
       "      <td>27000</td>\n",
       "      <td>448130642.0</td>\n",
       "      <td>Action|Thriller</td>\n",
       "      <td>...</td>\n",
       "      <td>2701.0</td>\n",
       "      <td>English</td>\n",
       "      <td>USA</td>\n",
       "      <td>PG-13</td>\n",
       "      <td>250000000.0</td>\n",
       "      <td>2012</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>8.5</td>\n",
       "      <td>2.35</td>\n",
       "      <td>164000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>Doug Walker</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>131.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Rob Walker</td>\n",
       "      <td>131</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Documentary</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>7.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   color      director_name  num_critic_for_reviews  duration  \\\n",
       "0  Color      James Cameron                   723.0     178.0   \n",
       "1  Color     Gore Verbinski                   302.0     169.0   \n",
       "2  Color         Sam Mendes                   602.0     148.0   \n",
       "3  Color  Christopher Nolan                   813.0     164.0   \n",
       "4    NaN        Doug Walker                     NaN       NaN   \n",
       "\n",
       "   director_facebook_likes  actor_3_facebook_likes      actor_2_name  \\\n",
       "0                      0.0                   855.0  Joel David Moore   \n",
       "1                    563.0                  1000.0     Orlando Bloom   \n",
       "2                      0.0                   161.0      Rory Kinnear   \n",
       "3                  22000.0                 23000.0    Christian Bale   \n",
       "4                    131.0                     NaN        Rob Walker   \n",
       "\n",
       "  actor_1_facebook_likes        gross                           genres  ...  \\\n",
       "0                   1000  760505847.0  Action|Adventure|Fantasy|Sci-Fi  ...   \n",
       "1                  40000  309404152.0         Action|Adventure|Fantasy  ...   \n",
       "2                  11000  200074175.0        Action|Adventure|Thriller  ...   \n",
       "3                  27000  448130642.0                  Action|Thriller  ...   \n",
       "4                    131          NaN                      Documentary  ...   \n",
       "\n",
       "  num_user_for_reviews language  country  content_rating       budget  \\\n",
       "0               3054.0  English      USA           PG-13  237000000.0   \n",
       "1               1238.0  English      USA           PG-13  300000000.0   \n",
       "2                994.0  English       UK           PG-13  245000000.0   \n",
       "3               2701.0  English      USA           PG-13  250000000.0   \n",
       "4                  NaN      NaN      NaN             NaN          NaN   \n",
       "\n",
       "   release_date actor_2_facebook_likes imdb_score  aspect_ratio facebook_likes  \n",
       "0          2009                  936.0        7.9          1.78          33000  \n",
       "1          2007                 5000.0        7.1          2.35              0  \n",
       "2          2015                  393.0        6.8          2.35          85000  \n",
       "3          2012                23000.0        8.5          2.35         164000  \n",
       "4           NaN                   12.0        7.1           NaN              0  \n",
       "\n",
       "[5 rows x 28 columns]"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "ec755587",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_csv(\"./data/newData.csv\",index=False,encoding=\"utf8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "id": "44efcbf9",
   "metadata": {},
   "outputs": [],
   "source": [
    "new_data = data.groupby(\"director_name\")['duration'].mean().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "00676b8d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
